"""
Tests for GenericDownloader.
"""

import pytest

from src.local_deep_research.research_library.downloaders.generic import (
    GenericDownloader,
)
from src.local_deep_research.research_library.downloaders.base import (
    ContentType,
)


class TestGenericCanHandle:
    """Tests for GenericDownloader.can_handle()."""

    @pytest.fixture
    def downloader(self):
        return GenericDownloader(timeout=30)

    def test_can_handle_any_url(self, downloader):
        """Generic downloader handles any URL as fallback."""
        assert downloader.can_handle("https://example.com/paper.pdf") is True
        assert downloader.can_handle("https://random-site.org/article") is True
        assert downloader.can_handle("https://arxiv.org/abs/2301.12345") is True

    def test_can_handle_empty_url(self, downloader):
        """Handles empty URL (fallback behavior)."""
        assert downloader.can_handle("") is True

    def test_can_handle_invalid_url(self, downloader):
        """Handles invalid URL (fallback behavior)."""
        assert downloader.can_handle("not a url") is True


class TestGenericDownload:
    """Tests for GenericDownloader.download()."""

    @pytest.fixture
    def downloader(self):
        return GenericDownloader(timeout=30)

    def test_download_pdf_success(self, downloader, mocker, mock_pdf_content):
        """Successfully downloads PDF."""
        mock_response = mocker.Mock()
        mock_response.status_code = 200
        mock_response.content = mock_pdf_content
        mock_response.headers = {"content-type": "application/pdf"}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        content = downloader.download(
            "https://example.com/paper.pdf", ContentType.PDF
        )
        assert content is not None
        assert content == mock_pdf_content

    def test_download_pdf_failure(self, downloader, mocker):
        """Returns None when PDF download fails."""
        mock_response = mocker.Mock()
        mock_response.status_code = 404
        mock_response.content = b""
        mock_response.headers = {}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        content = downloader.download(
            "https://example.com/paper.pdf", ContentType.PDF
        )
        assert content is None

    def test_download_text_from_pdf(self, downloader, mocker, mock_pdf_content):
        """Downloads PDF and extracts text for TEXT content type."""
        mock_response = mocker.Mock()
        mock_response.status_code = 200
        mock_response.content = mock_pdf_content
        mock_response.headers = {"content-type": "application/pdf"}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")
        # Mock text extraction (minimal PDF has no text)
        mocker.patch.object(
            GenericDownloader,
            "extract_text_from_pdf",
            return_value="Extracted text content",
        )

        content = downloader.download(
            "https://example.com/paper.pdf", ContentType.TEXT
        )
        assert content is not None
        assert b"Extracted text content" in content


class TestGenericDownloadWithResult:
    """Tests for GenericDownloader.download_with_result()."""

    @pytest.fixture
    def downloader(self):
        return GenericDownloader(timeout=30)

    def test_download_with_result_success(
        self, downloader, mocker, mock_pdf_content
    ):
        """Returns success result for successful download."""
        mock_response = mocker.Mock()
        mock_response.status_code = 200
        mock_response.content = mock_pdf_content
        mock_response.headers = {"content-type": "application/pdf"}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        result = downloader.download_with_result(
            "https://example.com/paper.pdf"
        )
        assert result.is_success is True
        assert result.content is not None

    def test_download_with_result_404(self, downloader, mocker):
        """Returns skip reason for 404 error."""
        # First call for PDF download fails
        mock_pdf_response = mocker.Mock()
        mock_pdf_response.status_code = 404
        mock_pdf_response.content = b""
        mock_pdf_response.headers = {}

        # Diagnostic call also returns 404
        call_count = [0]

        def mock_get(url, **kwargs):
            call_count[0] += 1
            mock_resp = mocker.Mock()
            mock_resp.status_code = 404
            mock_resp.content = b""
            mock_resp.headers = {}
            return mock_resp

        mocker.patch.object(downloader.session, "get", side_effect=mock_get)
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        result = downloader.download_with_result(
            "https://example.com/missing.pdf"
        )
        assert result.is_success is False
        assert (
            "404" in result.skip_reason
            or "not found" in result.skip_reason.lower()
        )

    def test_download_with_result_403(self, downloader, mocker):
        """Returns skip reason for 403 error."""
        mock_response = mocker.Mock()
        mock_response.status_code = 403
        mock_response.content = b""
        mock_response.headers = {}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        result = downloader.download_with_result(
            "https://example.com/protected.pdf"
        )
        assert result.is_success is False
        assert (
            "403" in result.skip_reason
            or "denied" in result.skip_reason.lower()
        )

    def test_download_with_result_html_response(self, downloader, mocker):
        """Returns skip reason when HTML is returned instead of PDF."""
        mock_response = mocker.Mock()
        mock_response.status_code = 200
        mock_response.content = b"<html><body>Login page</body></html>"
        mock_response.headers = {"content-type": "text/html"}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        result = downloader.download_with_result("https://example.com/paper")
        assert result.is_success is False
        assert (
            "login" in result.skip_reason.lower()
            or "subscription" in result.skip_reason.lower()
        )

    def test_download_with_result_timeout(self, downloader, mocker):
        """Returns skip reason for timeout."""
        import requests

        mocker.patch.object(
            downloader.session,
            "get",
            side_effect=requests.exceptions.Timeout("Timeout"),
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        result = downloader.download_with_result(
            "https://slow-server.com/paper.pdf"
        )
        assert result.is_success is False
        assert "timed" in result.skip_reason.lower()

    def test_download_with_result_connection_error(self, downloader, mocker):
        """Returns skip reason for connection error."""
        import requests

        mocker.patch.object(
            downloader.session,
            "get",
            side_effect=requests.exceptions.ConnectionError(
                "Connection refused"
            ),
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        result = downloader.download_with_result(
            "https://unreachable.com/paper.pdf"
        )
        assert result.is_success is False
        assert (
            "connect" in result.skip_reason.lower()
            or "down" in result.skip_reason.lower()
        )


class TestGenericPdfExtensionFallback:
    """Tests for trying .pdf extension fallback."""

    @pytest.fixture
    def downloader(self):
        return GenericDownloader(timeout=30)

    def test_tries_pdf_extension(self, downloader, mocker, mock_pdf_content):
        """Tries adding .pdf extension when URL doesn't end with .pdf."""
        call_count = [0]
        urls_called = []

        def mock_get(url, **kwargs):
            call_count[0] += 1
            urls_called.append(url)
            mock_resp = mocker.Mock()

            # First call (without .pdf) fails, second (with .pdf) succeeds
            if url.endswith(".pdf"):
                mock_resp.status_code = 200
                mock_resp.content = mock_pdf_content
                mock_resp.headers = {"content-type": "application/pdf"}
            else:
                mock_resp.status_code = 404
                mock_resp.content = b""
                mock_resp.headers = {}

            return mock_resp

        mocker.patch.object(downloader.session, "get", side_effect=mock_get)
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        downloader._download_pdf("https://example.com/paper")

        # Should have tried both URLs
        assert any("paper.pdf" in url for url in urls_called)

    def test_no_double_pdf_extension(
        self, downloader, mocker, mock_pdf_content
    ):
        """Doesn't add .pdf if URL already ends with .pdf."""
        urls_called = []

        def mock_get(url, **kwargs):
            urls_called.append(url)
            mock_resp = mocker.Mock()
            mock_resp.status_code = 200
            mock_resp.content = mock_pdf_content
            mock_resp.headers = {"content-type": "application/pdf"}
            return mock_resp

        mocker.patch.object(downloader.session, "get", side_effect=mock_get)
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        downloader._download_pdf("https://example.com/paper.pdf")

        # Should not have URLs ending with .pdf.pdf
        assert not any(".pdf.pdf" in url for url in urls_called)


class TestGenericTextExtraction:
    """Tests for text extraction from downloaded PDFs."""

    @pytest.fixture
    def downloader(self):
        return GenericDownloader(timeout=30)

    def test_download_with_result_text_success(
        self, downloader, mocker, mock_pdf_content
    ):
        """Successfully extracts text from PDF."""
        mock_response = mocker.Mock()
        mock_response.status_code = 200
        mock_response.content = mock_pdf_content
        mock_response.headers = {"content-type": "application/pdf"}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")
        mocker.patch.object(
            GenericDownloader,
            "extract_text_from_pdf",
            return_value="Extracted text",
        )

        result = downloader.download_with_result(
            "https://example.com/paper.pdf",
            ContentType.TEXT,
        )
        assert result.is_success is True
        assert b"Extracted text" in result.content

    def test_download_with_result_text_extraction_failed(
        self, downloader, mocker, mock_pdf_content
    ):
        """Returns skip reason when text extraction fails."""
        mock_response = mocker.Mock()
        mock_response.status_code = 200
        mock_response.content = mock_pdf_content
        mock_response.headers = {"content-type": "application/pdf"}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")
        mocker.patch.object(
            GenericDownloader,
            "extract_text_from_pdf",
            return_value=None,  # Extraction failed
        )

        result = downloader.download_with_result(
            "https://example.com/paper.pdf",
            ContentType.TEXT,
        )
        assert result.is_success is False
        assert "extraction" in result.skip_reason.lower()

    def test_download_with_result_text_pdf_not_found(self, downloader, mocker):
        """Returns skip reason when PDF cannot be downloaded for text extraction."""
        mock_response = mocker.Mock()
        mock_response.status_code = 404
        mock_response.content = b""
        mock_response.headers = {}

        mocker.patch.object(
            downloader.session, "get", return_value=mock_response
        )
        mocker.patch.object(
            downloader.rate_tracker, "apply_rate_limit", return_value=0
        )
        mocker.patch.object(downloader.rate_tracker, "record_outcome")

        result = downloader.download_with_result(
            "https://example.com/missing.pdf",
            ContentType.TEXT,
        )
        assert result.is_success is False
        assert "download" in result.skip_reason.lower()
